با توجه به داده بانک جهانی به سوالات زیر پاسخ دهید. برای استفاده از داده از سه فایل زیر استفاده نمایید. داده نام کشورها: WDICountry داده نام سری های زمانی: WDISeries داده کل: WDIData در صورتی که داده را در اختیار ندارید می توانید از بسته WDI استفاده نموده و داده های مورد نظر را استخراج نمایید.


library(WDI)
## Loading required package: RJSONIO
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyverse")
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ readr   1.1.1
## ✔ tibble  1.4.2     ✔ purrr   0.2.5
## ✔ tidyr   0.8.1     ✔ stringr 1.3.0
## ✔ ggplot2 3.0.0     ✔ forcats 0.2.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library("stringr")
library(ggplot2)
# URL of the zip file
wdi_url <- "http://databank.worldbank.org/data/download/WDI_csv.zip"
# location to save data
dst_dir <- "data"
dir.create(dst_dir, showWarnings = FALSE, recursive = TRUE)
dst_file <- file.path(dst_dir, "WDI_csv.zip")
# directory to unzip the contents of WDI_csv into
wdi_dir <- file.path(dst_dir, "WDI")
if (!file.exists(dst_file)) {
  download.file(wdi_url, dst_file)
  unzip(dst_file, exdir = wdi_dir)
}

I use *https://uw-pols501.github.io/2017/wdi_data_analysis.html* as an example how to use WDI data : importing data :

wdi_country <- read_csv(file.path(wdi_dir,'WDICountry.csv'))
## Warning: Missing column names filled in: 'X32' [32]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   `National accounts reference year` = col_integer(),
##   `Latest industrial data` = col_integer(),
##   `Latest trade data` = col_integer(),
##   `Latest water withdrawal data` = col_integer()
## )
## See spec(...) for full column specifications.
wdi_data <-read_csv(file.path(wdi_dir,'WDIData.csv'))
## Warning: Missing column names filled in: 'X63' [63]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character(),
##   `Indicator Name` = col_character(),
##   `Indicator Code` = col_character(),
##   X63 = col_character()
## )
## See spec(...) for full column specifications.
wdi_series <- read.csv(file.path(wdi_dir,'WDISeries.csv'))
wdi_data <- wdi_data %>% select(-X63)

۱. ده کشور فقیر دنیا را بیابید. نمودار درآمد روزانه آنها را رسم کنید. چند درصد از اعضای این کشورها زیر خط فقر هستند؟ متوسط عمر در این کشورها چقدر است؟

ten poorest I use “Adjusted net national income per capita (current US$)” in year 2016 for this part:

gdp_cap <- wdi_data %>% filter(`Indicator Name`=="Adjusted net national income per capita (current US$)")
gdp_cap %>% select(`Country Name`,income=`2016`) %>% arrange(income) %>% slice(1:10)-> 
  ten_poorest
ten_poorest
## # A tibble: 10 x 2
##    `Country Name`           income
##    <chr>                     <dbl>
##  1 Malawi                      201
##  2 Burundi                     202
##  3 Liberia                     241
##  4 Niger                       292
##  5 Mozambique                  297
##  6 Central African Republic    299
##  7 Gambia, The                 319
##  8 Congo, Dem. Rep.            332
##  9 Madagascar                  358
## 10 Sierra Leone                362

daily income plot by dividing year income to 365 we get daily income:

daily_income = ten_poorest
daily_income$income = ten_poorest$income / 365
daily_income %>% 
  ggplot() + geom_bar(aes(x = `Country Name`, y = `income`),stat ="identity" )+ 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

below povert line percent as there are a lot of nans in data I compute mean of poverty head count ratio between all years .

  wdi_data %>% filter(`Indicator Name`=="Poverty headcount ratio at national poverty lines (% of population)")%>%
  filter(`Country Name` %in% ten_poorest$`Country Name`) %>% select(-c(2,3,4)) -> temp
data.frame(ID=temp$`Country Name`, percent = rowMeans(temp %>% select(-`Country Name`), na.rm = T)) %>% 
  ggplot() + geom_bar(aes(x = ID, y = percent),stat ="identity" )+ 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

average Life expectancy at birth i use year 2016 data for this as there is no data for 2017. and Life expectancy at birth as measure fr this part :

library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
  wdi_data %>% filter(`Indicator Name`=='Life expectancy at birth, total (years)')%>% filter(`Country Name` %in% ten_poorest$`Country Name`) ->temp
temp %>% mutate(Life_exp=rowMeans(temp %>% select(-c(1,2,3,4)),na.rm=T)) %>% 
  select(`Country Name`,Life_exp) -> temp
temp%>%
  ggplot() + geom_bar(aes(x = `Country Name`, y = Life_exp),stat ="identity" )+ 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

***

۲. تراژدی روآندا: بر اساس داده های امید به زندگی ابتدا نمودار سالانه نمودار جعبه ایی امید به زندگی کشورها را رسم نمایید(در یک نمودار!). سپس سری زمانی امید به زندگی روآندا را به آن اضافه کنید. چه می بینید؟ چند میلیون نفر کشته شدند؟

wdi_data %>% filter(`Indicator Name` == 'Life expectancy at birth, total (years)') %>% 
  select(-c(2,3,4)) -> Life_exp
Life_exp %>% 
  gather('year','life expect',-1)%>% 
  filter(`Country Name` == "Rwanda") %>% 
  ggplot() + geom_boxplot(aes(x = year , y = `life expect`)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

as it can be seen there is a very strong deacrease on life expectacy after year 1986 that was Rwanda’s genocide . as I searched there were about 1 milion people that died in in these years .


۳. نمودار امید به زندگی و هزینه های بهداشتی را رسم کنید. چه نتیجه ایی می گیرید؟

i use 2015 data for now : Current health expenditure per capita (current US$)

 wdi_data %>% filter(`Indicator Name`=='Life expectancy at birth, total (years)') %>% select(`Country Name`,Life_exp=`2015`) %>% arrange(`Country Name`) -> Life_exp

wdi_data %>% filter(`Indicator Name`=='Current health expenditure per capita (current US$)') %>%   select(`Country Name`,Life_exp=`2015`) %>% arrange(`Country Name`)-> hel_expcap

life2helexp = data.frame(Life_exp = Life_exp$Life_exp , hel_expcap = hel_expcap$Life_exp)

p <- ggplot(life2helexp, aes(x=Life_exp, y=hel_expcap)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   xlab('life expectacy ') + ylab('Current health expenditure per capita (current US$)')

p
## Warning: Removed 40 rows containing non-finite values (stat_smooth).
## Warning: Removed 40 rows containing missing values (geom_point).

as you can see for high life expectacy we need much more health expenditure per capita . for middel and low life expectacy(less than 70) we see that there is not significant effect for life expectacy by health expenditure .

you can see a better trend for Current health expenditure (% of GDP) data .

wdi_data %>% filter(`Indicator Name`=='Current health expenditure (% of GDP)') %>%   select(`Country Name`,Life_exp=`2015`) %>% arrange(`Country Name`)-> hel_expcap

life2helexp = data.frame(Life_exp = Life_exp$Life_exp , hel_expcap = hel_expcap$Life_exp)

p <- ggplot(life2helexp, aes(x=Life_exp, y=hel_expcap)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   xlab('life expectacy ') + ylab('Current health expenditure per capita (current US$)')
p
## Warning: Removed 40 rows containing non-finite values (stat_smooth).
## Warning: Removed 40 rows containing missing values (geom_point).


۴. آیا قدرت خرید خانواده های ایرانی در ۵۰ سال اخیر افزایش یافته است؟ برای این کار از داده های اقتصادی خانوار استفاده کنید.

I use Households and NPISHs final consumption expenditu as a measure for this :

wdi_data %>% filter(`Indicator Code`=='NE.CON.PRVT.KN',`Country Code`=='IRN') -> temp_irn_hh
temp_irn_hh
## # A tibble: 1 x 62
##   `Country Name`  `Country Code` `Indicator Name` `Indicator Code`  `1960`
##   <chr>           <chr>          <chr>            <chr>              <dbl>
## 1 Iran, Islamic … IRN            Households and … NE.CON.PRVT.KN   2.33e¹⁴
## # ... with 57 more variables: `1961` <dbl>, `1962` <dbl>, `1963` <dbl>,
## #   `1964` <dbl>, `1965` <dbl>, `1966` <dbl>, `1967` <dbl>, `1968` <dbl>,
## #   `1969` <dbl>, `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>,
## #   `1974` <dbl>, `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>,
## #   `1979` <dbl>, `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>,
## #   `1984` <dbl>, `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>,
## #   `1989` <dbl>, `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>,
## #   `1994` <dbl>, `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
## #   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>,
## #   `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>,
## #   `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>,
## #   `2014` <dbl>, `2015` <dbl>, `2016` <dbl>, `2017` <dbl>
p3 <- data.frame(year = c(1960:2017),hh_con = as.numeric(as.matrix(temp_irn_hh)[1,5:62])) %>% ggplot() +   geom_point(aes(x=year,y=hh_con))
p3
## Warning: Removed 1 rows containing missing values (geom_point).

as it can be seen yes . irans household consumption shows significant increase due 50 years ago . ***

۵. رشد اقتصادی ایران را با کشورهای دیگر در طول ۲۰ سال گذشته بر حسب بیست شاخص های اقتصادی مهم مانند تولید ناخالص ملی، تورم و … ارزیابی کنید! (برای هر شاخص از تصویرسازی استفاده کنید.)

I will use rows from 20 years before 2006 that does not contain any nan value . and then select 20 of them that are in world and iran.q

unique(wdi_series$Topic) -> uni_topic
wdi_series$Indicator.Name[grepl('Economic',wdi_series$Topic)] -> ec_indic

### i find features with not nan in them
wdi_data %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) %>% select(c(1,2,3,4,42:61)) -> temp_ir
temp_ir <- temp_ir[complete.cases(temp_ir),]
temp_ir[temp_ir$`Indicator Name` %in% ec_indic,]
## # A tibble: 562 x 24
##    `Country Name` `Country Code` `Indicator Name`         `Indicator Code`
##    <chr>          <chr>          <chr>                    <chr>           
##  1 World          WLD            Adjusted net national i… NY.ADJ.NNTY.KD.…
##  2 World          WLD            Adjusted net national i… NY.ADJ.NNTY.KD  
##  3 World          WLD            Adjusted net national i… NY.ADJ.NNTY.CD  
##  4 World          WLD            Adjusted net national i… NY.ADJ.NNTY.PC.…
##  5 World          WLD            Adjusted net national i… NY.ADJ.NNTY.PC.…
##  6 World          WLD            Adjusted net national i… NY.ADJ.NNTY.PC.…
##  7 World          WLD            Adjusted net savings, e… NY.ADJ.SVNX.GN.…
##  8 World          WLD            Adjusted net savings, i… NY.ADJ.SVNG.GN.…
##  9 World          WLD            Adjusted savings: carbo… NY.ADJ.DCO2.GN.…
## 10 World          WLD            Adjusted savings: consu… NY.ADJ.DKAP.GN.…
## # ... with 552 more rows, and 20 more variables: `1997` <dbl>,
## #   `1998` <dbl>, `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>,
## #   `2003` <dbl>, `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>,
## #   `2008` <dbl>, `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>,
## #   `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>
imp_economy = c('Inflation, consumer prices (annual %)','GDP (constant 2010 US$)','Adjusted net national income (annual % growth)','Adjusted net savings, excluding particulate emission damage (% of GNI)','Adjusted net savings, including particulate emission damage (% of GNI)',
      'Employers, total (% of total employment) (modeled ILO estimate)' , 
      'Communications, computer, etc. (% of service exports, BoP)',
      'GDP, PPP (current international $)',
      'General government final consumption expenditure (% of GDP)',
      'GNI (current US$)',
      'Gross domestic savings (% of GDP)',
      'Imports of goods and services (% of GDP)',
      'Stocks traded, total value (% of GDP)',
      'Domestic credit to private sector (% of GDP)',
        "Industry, value added (% of GDP)" ,
        "Listed domestic companies, total",
      "Expense (% of GDP)",
      'Charges for the use of intellectual property, payments (BoP, current US$)',
      'Market capitalization of listed domestic companies (% of GDP)',
      'Revenue, excluding grants (% of GDP)',
      'Net ODA received (% of GNI)'
)          
wdi_data %>% filter(`Indicator Name` %in% imp_economy)  %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) -> wdi_ec_irworld
## for last 20 years 
wdi_ec_irworld <- wdi_ec_irworld %>% select(1 , 2 ,3 ,4 , 43:62)
wdi_ec_irworld$`Indicator Name`
##  [1] "Adjusted net national income (annual % growth)"                           
##  [2] "Adjusted net savings, excluding particulate emission damage (% of GNI)"   
##  [3] "Adjusted net savings, including particulate emission damage (% of GNI)"   
##  [4] "Charges for the use of intellectual property, payments (BoP, current US$)"
##  [5] "Communications, computer, etc. (% of service exports, BoP)"               
##  [6] "Domestic credit to private sector (% of GDP)"                             
##  [7] "Employers, total (% of total employment) (modeled ILO estimate)"          
##  [8] "Expense (% of GDP)"                                                       
##  [9] "GDP (constant 2010 US$)"                                                  
## [10] "GDP, PPP (current international $)"                                       
## [11] "General government final consumption expenditure (% of GDP)"              
## [12] "GNI (current US$)"                                                        
## [13] "Gross domestic savings (% of GDP)"                                        
## [14] "Imports of goods and services (% of GDP)"                                 
## [15] "Inflation, consumer prices (annual %)"                                    
## [16] "Listed domestic companies, total"                                         
## [17] "Market capitalization of listed domestic companies (% of GDP)"            
## [18] "Net ODA received (% of GNI)"                                              
## [19] "Revenue, excluding grants (% of GDP)"                                     
## [20] "Stocks traded, total value (% of GDP)"                                    
## [21] "Adjusted net national income (annual % growth)"                           
## [22] "Adjusted net savings, excluding particulate emission damage (% of GNI)"   
## [23] "Adjusted net savings, including particulate emission damage (% of GNI)"   
## [24] "Charges for the use of intellectual property, payments (BoP, current US$)"
## [25] "Communications, computer, etc. (% of service exports, BoP)"               
## [26] "Domestic credit to private sector (% of GDP)"                             
## [27] "Employers, total (% of total employment) (modeled ILO estimate)"          
## [28] "Expense (% of GDP)"                                                       
## [29] "GDP (constant 2010 US$)"                                                  
## [30] "GDP, PPP (current international $)"                                       
## [31] "General government final consumption expenditure (% of GDP)"              
## [32] "GNI (current US$)"                                                        
## [33] "Gross domestic savings (% of GDP)"                                        
## [34] "Imports of goods and services (% of GDP)"                                 
## [35] "Inflation, consumer prices (annual %)"                                    
## [36] "Listed domestic companies, total"                                         
## [37] "Market capitalization of listed domestic companies (% of GDP)"            
## [38] "Net ODA received (% of GNI)"                                              
## [39] "Revenue, excluding grants (% of GDP)"                                     
## [40] "Stocks traded, total value (% of GDP)"
wdi_ec_irworld$`Country Name` = as.factor(wdi_ec_irworld$`Country Name`)
for (each in wdi_ec_irworld$`Indicator Name`){
  wdi_ec_irworld %>% filter(`Indicator Name`==each) %>% select(-c(2,3,4)) %>% 
    gather('year','indic',-1) %>% ggplot(aes(x=year,y=indic,group=`Country Name`,color=`Country Name`)) + geom_line(size=2)+
    ggtitle(each) ->p
  print(p)
  }
## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 18 rows containing missing values (geom_path).

## Warning: Removed 18 rows containing missing values (geom_path).

## Warning: Removed 17 rows containing missing values (geom_path).

## Warning: Removed 17 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 9 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 9 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 18 rows containing missing values (geom_path).

## Warning: Removed 18 rows containing missing values (geom_path).

## Warning: Removed 17 rows containing missing values (geom_path).

## Warning: Removed 17 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 9 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 9 rows containing missing values (geom_path).


۶. در قسمت قبل با استفاده از روش خوشه بندی k-means داده ها را به سه دسته تقسیم کنید. ایران در کدام دسته می گنجد؟ (پیش از خوشه بندی طبیعتا داده را باید پاکسازی و استاندارد سازی نمایید.)

I cluster means of indicatores for all years that we have value. as I said before I use indicatores that not all of years are nan . I use all country instead of world for this purpus .

all_con <- wdi_data %>% filter(`Indicator Name` %in% imp_economy) 
ec_data = data.frame(coun = all_con$`Country Name`,indic=all_con$`Indicator Name`,
                     indic_num = rowMeans(all_con %>% select(-c(1:4)),na.rm = T))
ec_data_sp <- spread(ec_data,key = indic,value =indic_num)
#### removing nan contains value
ec_data_sp<- ec_data_sp[complete.cases(ec_data_sp),]
ec_data_sp[,-1] <- scale(ec_data_sp[,-c(1)])
clus <- kmeans(ec_data_sp[,-1],centers = 3)
data.frame(country=ec_data_sp$coun,cluster=clus$cluster) %>% arrange(cluster)
##                                               country cluster
## 1                                            Botswana       1
## 2                      Central Europe and the Baltics       1
## 3                                             Croatia       1
## 4                                              Cyprus       1
## 5                                    Egypt, Arab Rep.       1
## 6                                           Euro area       1
## 7                                      European Union       1
## 8       Europe & Central Asia (excluding high income)       1
## 9        Europe & Central Asia (IDA & IBRD countries)       1
## 10                                             Israel       1
## 11                                            Jamaica       1
## 12                                             Jordan       1
## 13                                              Kenya       1
## 14                                             Kuwait       1
## 15                                            Lebanon       1
## 16 Middle East & North Africa (excluding high income)       1
## 17  Middle East & North Africa (IDA & IBRD countries)       1
## 18                                            Namibia       1
## 19                                               Oman       1
## 20                                   Papua New Guinea       1
## 21                                           Slovenia       1
## 22                                       South Africa       1
## 23                                          Swaziland       1
## 24                                            Tunisia       1
## 25                                            Ukraine       1
## 26                                            Uruguay       1
## 27                              Europe & Central Asia       2
## 28                                        High income       2
## 29                                      North America       2
## 30                                       OECD members       2
## 31                          Post-demographic dividend       2
## 32                                              World       2
## 33                                          Argentina       3
## 34                                            Bahrain       3
## 35                                         Bangladesh       3
## 36                                              Chile       3
## 37                                           Colombia       3
## 38                                         Costa Rica       3
## 39                         Early-demographic dividend       3
## 40                                              Ghana       3
## 41                                          IDA blend       3
## 42                                              India       3
## 43                                          Indonesia       3
## 44                                 Iran, Islamic Rep.       3
## 45                                         Kazakhstan       3
## 46                                        Korea, Rep.       3
## 47                                Lower middle income       3
## 48                                           Malaysia       3
## 49                                          Mauritius       3
## 50                                             Mexico       3
## 51                                            Morocco       3
## 52                                            Nigeria       3
## 53                                           Pakistan       3
## 54                                           Paraguay       3
## 55                                               Peru       3
## 56                                        Philippines       3
## 57                                              Qatar       3
## 58                                          Singapore       3
## 59                                         South Asia       3
## 60                            South Asia (IDA & IBRD)       3
## 61                                          Sri Lanka       3
## 62                                 Sub-Saharan Africa       3
## 63         Sub-Saharan Africa (excluding high income)       3
## 64          Sub-Saharan Africa (IDA & IBRD countries)       3
## 65                                           Tanzania       3
## 66                                           Thailand       3
## 67                                             Turkey       3
## 68                                           Zimbabwe       3

iran is in midle incomes like bangkadesh .. costa rica and other groups .


۷. به وسیله تحلیل مولفه اصلی بعد داده رو به دو کاهش دهید سپس خوشه های به دست آمده در قسمت قبل را بر روی آن نمایش دهید. آیا عملکرد روش خوشه بندی شما مطلوب بوده است؟

comp <-prcomp(ec_data_sp[,-1])
pc_ec <- data.frame(con = ec_data_sp$coun,pc = comp$x[,1:2])
pc_clus<-kmeans(pc_ec[,-1],centers = 3)
pc_ec$pc_clus <-as.factor(pc_clus$cluster)
pc_ec %>%
  ggplot() + geom_point(aes(x=pc.PC1,y=pc.PC2,color=pc_clus))

group 1 in here are irans group that as we can see my features and clustring algorithm done well on data. ***

۸. با استفاده از داده روشی برای پیش بینی رشد اقتصادی ایران در سال آینده ارائه دهید.

i think by a having a regression model on timeseris of indicators that we have here we can find next years indicators and economic growth. in here our response variable will be next t year from each years . and past years would be our predictors.


۹. سوالهای ۵ تا ۷ را ابتدا برای ۲۰ شاخص سلامت سپس بر حسب ۲۰ شاخص آموزشی تکرار کنید.

health: find indicatores in both iran and world. like before features without nan in 20 years ago .

unique(wdi_series$Topic) -> uni_topic
wdi_series$Indicator.Name[grepl('Health',wdi_series$Topic)] -> he_indic
he_indic
##   [1] Proportion of women subjected to physical and/or sexual violence in the last 12 months (% of women age 15-49)                  
##   [2] Women who believe a husband is justified in beating his wife when she argues with him (%)                                      
##   [3] Women who believe a husband is justified in beating his wife when she burns the food (%)                                       
##   [4] Women who believe a husband is justified in beating his wife when she goes out without telling him (%)                         
##   [5] Women who believe a husband is justified in beating his wife when she neglects the children (%)                                
##   [6] Women who believe a husband is justified in beating his wife (any of five reasons) (%)                                         
##   [7] Women who believe a husband is justified in beating his wife when she refuses sex with him (%)                                 
##   [8] Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)                           
##   [9] Prevalence of anemia among women of reproductive age (% of women ages 15-49)                                                   
##  [10] Prevalence of anemia among children (% of children under 5)                                                                    
##  [11] Prevalence of anemia among non-pregnant women (% of women ages 15-49)                                                          
##  [12] Condom use, population ages 15-24, female (% of females ages 15-24)                                                            
##  [13] Condom use, population ages 15-24, male (% of males ages 15-24)                                                                
##  [14] Number of deaths ages 5-14 years                                                                                               
##  [15] Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)                          
##  [16] Number of infant deaths                                                                                                        
##  [17] Cause of death, by injury (% of total)                                                                                         
##  [18] Number of under-five deaths                                                                                                    
##  [19] Cause of death, by non-communicable diseases (% of total)                                                                      
##  [20] Number of neonatal deaths                                                                                                      
##  [21] Probability of dying at age 5-14 years (per 1,000 children age 5)                                                              
##  [22] Women's share of population ages 15+ living with HIV (%)                                                                       
##  [23] Prevalence of HIV, total (% of population ages 15-49)                                                                          
##  [24] Mortality rate, under-5 (per 1,000 live births)                                                                                
##  [25] Mortality rate, under-5, female (per 1,000 live births)                                                                        
##  [26] Mortality rate, under-5, male (per 1,000 live births)                                                                          
##  [27] Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)                                                   
##  [28] Mortality rate, neonatal (per 1,000 live births)                                                                               
##  [29] Demand for family planning satisfied by modern methods (% of married women with demand for family planning)                    
##  [30] People using at least basic drinking water services, rural (% of rural population)                                             
##  [31] People using at least basic drinking water services, urban (% of urban population)                                             
##  [32] People using at least basic drinking water services (% of population)                                                          
##  [33] People using safely managed drinking water services, rural (% of rural population)                                             
##  [34] People using safely managed drinking water services, urban (% of urban population)                                             
##  [35] People using safely managed drinking water services (% of population)                                                          
##  [36] Children (0-14) living with HIV                                                                                                
##  [37] Prevalence of HIV, female (% ages 15-24)                                                                                       
##  [38] Prevalence of HIV, male (% ages 15-24)                                                                                         
##  [39] Antiretroviral therapy coverage (% of people living with HIV)                                                                  
##  [40] Adults (ages 15+) newly infected with HIV                                                                                      
##  [41] Children (ages 0-14) newly infected with HIV                                                                                   
##  [42] Adults (ages 15+) and children (ages 0-14) newly infected with HIV                                                             
##  [43] Incidence of HIV (% of uninfected population ages 15-49)                                                                       
##  [44] Antiretroviral therapy coverage for PMTCT (% of pregnant women living with HIV)                                                
##  [45] Immunization, HepB3 (% of one-year-old children)                                                                               
##  [46] Immunization, DPT (% of children ages 12-23 months)                                                                            
##  [47] Immunization, measles (% of children ages 12-23 months)                                                                        
##  [48] Hospital beds (per 1,000 people)                                                                                               
##  [49] Community health workers (per 1,000 people)                                                                                    
##  [50] Nurses and midwives (per 1,000 people)                                                                                         
##  [51] Physicians (per 1,000 people)                                                                                                  
##  [52] Specialist surgical workforce (per 100,000 population)                                                                         
##  [53] Incidence of malaria (per 1,000 population at risk)                                                                            
##  [54] Use of insecticide-treated bed nets (% of under-5 population)                                                                  
##  [55] Children with fever receiving antimalarial drugs (% of children under age 5 with fever)                                        
##  [56] Number of maternal deaths                                                                                                      
##  [57] Lifetime risk of maternal death (1 in: rate varies by country)                                                                 
##  [58] Lifetime risk of maternal death (%)                                                                                            
##  [59] Prevalence of anemia among pregnant women (%)                                                                                  
##  [60] Smoking prevalence, total (ages 15+)                                                                                           
##  [61] Smoking prevalence, females (% of adults)                                                                                      
##  [62] Smoking prevalence, males (% of adults)                                                                                        
##  [63] Risk of catastrophic expenditure for surgical care (% of people at risk)                                                       
##  [64] Risk of impoverishing expenditure for surgical care (% of people at risk)                                                      
##  [65] Number of surgical procedures (per 100,000 population)                                                                         
##  [66] Pregnant women receiving prenatal care (%)                                                                                     
##  [67] ARI treatment (% of children under 5 taken to a health provider)                                                               
##  [68] People using at least basic sanitation services, rural (% of rural population)                                                 
##  [69] People using at least basic sanitation services, urban (% of urban population)                                                 
##  [70] People using at least basic sanitation services (% of population)                                                              
##  [71] Exclusive breastfeeding (% of children under 6 months)                                                                         
##  [72] Births attended by skilled health staff (% of total)                                                                           
##  [73] Low-birthweight babies (% of births)                                                                                           
##  [74] Diabetes prevalence (% of population ages 20 to 79)                                                                            
##  [75] Female genital mutilation prevalence (%)                                                                                       
##  [76] People with basic handwashing facilities including soap and water, rural (% of rural population)                               
##  [77] People with basic handwashing facilities including soap and water, urban (% of urban population)                               
##  [78] People with basic handwashing facilities including soap and water (% of population)                                            
##  [79] Prevalence of underweight, weight for age, female (% of children under 5)                                                      
##  [80] Prevalence of underweight, weight for age, male (% of children under 5)                                                        
##  [81] Prevalence of underweight, weight for age (% of children under 5)                                                              
##  [82] Maternal mortality ratio (modeled estimate, per 100,000 live births)                                                           
##  [83] Maternal mortality ratio (national estimate, per 100,000 live births)                                                          
##  [84] People practicing open defecation, rural (% of rural population)                                                               
##  [85] People practicing open defecation, urban (% of urban population)                                                               
##  [86] People practicing open defecation (% of population)                                                                            
##  [87] Diarrhea treatment (% of children under 5 receiving oral rehydration and continued feeding)                                    
##  [88] Diarrhea treatment (% of children under 5 who received ORS packet)                                                             
##  [89] Prevalence of overweight, weight for height, female (% of children under 5)                                                    
##  [90] Prevalence of overweight, weight for height, male (% of children under 5)                                                      
##  [91] Prevalence of overweight, weight for height (% of children under 5)                                                            
##  [92] People using safely managed sanitation services, rural (% of rural population)                                                 
##  [93] People using safely managed sanitation services, urban  (% of urban population)                                                
##  [94] People using safely managed sanitation services (% of population)                                                              
##  [95] Prevalence of stunting, height for age, female (% of children under 5)                                                         
##  [96] Prevalence of stunting, height for age, male (% of children under 5)                                                           
##  [97] Prevalence of stunting, height for age (% of children under 5)                                                                 
##  [98] Suicide mortality rate (per 100,000 population)                                                                                
##  [99] Mortality caused by road traffic injury (per 100,000 people)                                                                   
## [100] Prevalence of wasting, weight for height, female (% of children under 5)                                                       
## [101] Prevalence of wasting, weight for height, male (% of children under 5)                                                         
## [102] Prevalence of wasting, weight for height (% of children under 5)                                                               
## [103] Prevalence of severe wasting, weight for height, female (% of children under 5)                                                
## [104] Prevalence of severe wasting, weight for height, male (% of children under 5)                                                  
## [105] Prevalence of severe wasting, weight for height (% of children under 5)                                                        
## [106] Tuberculosis treatment success rate (% of new cases)                                                                           
## [107] Tuberculosis case detection rate (%, all forms)                                                                                
## [108] Incidence of tuberculosis (per 100,000 people)                                                                                 
## [109] Increase in poverty gap at $1.90 ($ 2011 PPP) poverty line due to out-of-pocket health care expenditure (USD)                  
## [110] Number of people pushed below the $1.90 ($ 2011 PPP) poverty line by out-of-pocket health care expenditure                     
## [111] Increase in poverty gap at $1.90 ($ 2011 PPP) poverty line due to out-of-pocket health care expenditure (% of poverty line)    
## [112] Proportion of population pushed below the $1.90 ($ 2011 PPP) poverty line by out-of-pocket health care expenditure (%)         
## [113] Increase in poverty gap at $3.10 ($ 2011 PPP) poverty line due to out-of-pocket health care expenditure (USD)                  
## [114] Number of people pushed below the $3.10 ($ 2011 PPP) poverty line by out-of-pocket health care expenditure                     
## [115] Increase in poverty gap at $3.10 ($ 2011 PPP) poverty line due to out-of-pocket health care expenditure (% of poverty line)    
## [116] Proportion of population pushed below the $3.10 ($ 2011 PPP) poverty line by out-of-pocket health care expenditure (%)         
## [117] Number of people spending more than 10% of household consumption or income on out-of-pocket health care expenditure            
## [118] Proportion of population spending more than 10% of household consumption or income on out-of-pocket health care expenditure (%)
## [119] Number of people spending more than 25% of household consumption or income on out-of-pocket health care expenditure            
## [120] Proportion of population spending more than 25% of household consumption or income on out-of-pocket health care expenditure (%)
## [121] UHC service coverage index                                                                                                     
## [122] Newborns protected against tetanus (%)                                                                                         
## [123] Current health expenditure (% of GDP)                                                                                          
## [124] Current health expenditure per capita (current US$)                                                                            
## [125] Current health expenditure per capita, PPP (current international $)                                                           
## [126] External health expenditure (% of current health expenditure)                                                                  
## [127] External health expenditure per capita (current US$)                                                                           
## [128] External health expenditure per capita, PPP (current international $)                                                          
## [129] Domestic general government health expenditure (% of current health expenditure)                                               
## [130] Domestic general government health expenditure (% of GDP)                                                                      
## [131] Domestic general government health expenditure (% of general government expenditure)                                           
## [132] Domestic general government health expenditure per capita (current US$)                                                        
## [133] Domestic general government health expenditure per capita, PPP (current international $)                                       
## [134] Out-of-pocket expenditure (% of current health expenditure)                                                                    
## [135] Out-of-pocket expenditure per capita (current US$)                                                                             
## [136] Out-of-pocket expenditure per capita, PPP (current international $)                                                            
## [137] Domestic private health expenditure (% of current health expenditure)                                                          
## [138] Domestic private health expenditure per capita (current US$)                                                                   
## [139] Domestic private health expenditure per capita, PPP  (current international $)                                                 
## [140] Prevalence of undernourishment (% of population)                                                                               
## [141] Depth of the food deficit (kilocalories per person per day)                                                                    
## [142] Consumption of iodized salt (% of households)                                                                                  
## [143] Vitamin A supplementation coverage rate (% of children ages 6-59 months)                                                       
## [144] Adolescent fertility rate (births per 1,000 women ages 15-19)                                                                  
## [145] Completeness of infant death reporting (% of reported infant deaths to estimated infant deaths)                                
## [146] Completeness of total death reporting (% of reported total deaths to estimated total deaths)                                   
## [147] Mortality rate, adult, female (per 1,000 female adults)                                                                        
## [148] Mortality rate, adult, male (per 1,000 male adults)                                                                            
## [149] Birth rate, crude (per 1,000 people)                                                                                           
## [150] Death rate, crude (per 1,000 people)                                                                                           
## [151] Contraceptive prevalence, modern methods (% of women ages 15-49)                                                               
## [152] Contraceptive prevalence, any methods (% of women ages 15-49)                                                                  
## [153] Mortality rate, infant, female (per 1,000 live births)                                                                         
## [154] Mortality rate, infant (per 1,000 live births)                                                                                 
## [155] Mortality rate, infant, male (per 1,000 live births)                                                                           
## [156] Life expectancy at birth, female (years)                                                                                       
## [157] Life expectancy at birth, total (years)                                                                                        
## [158] Life expectancy at birth, male (years)                                                                                         
## [159] Fertility rate, total (births per woman)                                                                                       
## [160] Survival to age 65, female (% of cohort)                                                                                       
## [161] Survival to age 65, male (% of cohort)                                                                                         
## [162] Wanted fertility rate (births per woman)                                                                                       
## [163] Female headed households (% of households with a female head)                                                                  
## [164] Women who were first married by age 18 (% of women ages 20-24)                                                                 
## [165] Teenage mothers (% of women ages 15-19 who have had children or are currently pregnant)                                        
## [166] Population ages 0-4, female (% of female population)                                                                           
## [167] Population ages 0-4, male (% of male population)                                                                               
## [168] Population ages 0-14, female                                                                                                   
## [169] Population ages 0-14, female (% of total)                                                                                      
## [170] Population ages 0-14, male                                                                                                     
## [171] Population ages 0-14, male (% of total)                                                                                        
## [172] Population ages 0-14, total                                                                                                    
## [173] Population ages 0-14 (% of total)                                                                                              
## [174] Population ages 5-9, female (% of female population)                                                                           
## [175] Population ages 5-9, male (% of male population)                                                                               
## [176] Population ages 10-14, female (% of female population)                                                                         
## [177] Population ages 10-14, male (% of male population)                                                                             
## [178] Population ages 15-19, female (% of female population)                                                                         
## [179] Population ages 15-19, male (% of male population)                                                                             
## [180] Population ages 15-64, female                                                                                                  
## [181] Population ages 15-64, female (% of total)                                                                                     
## [182] Population ages 15-64, male                                                                                                    
## [183] Population ages 15-64, male (% of total)                                                                                       
## [184] Population ages 15-64, total                                                                                                   
## [185] Population ages 15-64 (% of total)                                                                                             
## [186] Population ages 20-24, female (% of female population)                                                                         
## [187] Population ages 20-24, male (% of male population)                                                                             
## [188] Population ages 25-29, female (% of female population)                                                                         
## [189] Population ages 25-29, male (% of male population)                                                                             
## [190] Population ages 30-34, female (% of female population)                                                                         
## [191] Population ages 30-34, male (% of male population)                                                                             
## [192] Population ages 35-39, female (% of female population)                                                                         
## [193] Population ages 35-39, male (% of male population)                                                                             
## [194] Population ages 40-44, female (% of female population)                                                                         
## [195] Population ages 40-44, male (% of male population)                                                                             
## [196] Population ages 45-49, female (% of female population)                                                                         
## [197] Population ages 45-49, male (% of male population)                                                                             
## [198] Population ages 50-54, female (% of female population)                                                                         
## [199] Population ages 50-54, male (% of male population)                                                                             
## [200] Population ages 55-59, female (% of female population)                                                                         
## [201] Population ages 55-59, male (% of male population)                                                                             
## [202] Population ages 60-64, female (% of female population)                                                                         
## [203] Population ages 60-64, male (% of male population)                                                                             
## [204] Population ages 65-69, female (% of female population)                                                                         
## [205] Population ages 65-69, male (% of male population)                                                                             
## [206] Population ages 65 and above, female                                                                                           
## [207] Population ages 65 and above, female (% of total)                                                                              
## [208] Population ages 65 and above, male                                                                                             
## [209] Population ages 65 and above, male (% of total)                                                                                
## [210] Population ages 65 and above, total                                                                                            
## [211] Population ages 65 and above (% of total)                                                                                      
## [212] Population ages 70-74, female (% of female population)                                                                         
## [213] Population ages 70-74, male (% of male population)                                                                             
## [214] Population ages 75-79, female (% of female population)                                                                         
## [215] Population ages 75-79, male (% of male population)                                                                             
## [216] Population ages 80 and above, female (% of female population)                                                                  
## [217] Population ages 80 and above, male (% of male population)                                                                      
## [218] Sex ratio at birth (male births per female births)                                                                             
## [219] Age dependency ratio (% of working-age population)                                                                             
## [220] Age dependency ratio, old (% of working-age population)                                                                        
## [221] Age dependency ratio, young (% of working-age population)                                                                      
## [222] Population growth (annual %)                                                                                                   
## [223] Population, total                                                                                                              
## [224] Population, female                                                                                                             
## [225] Population, female (% of total)                                                                                                
## [226] Population, male                                                                                                               
## [227] Population, male (% of total)                                                                                                  
## [228] Completeness of birth registration, female (%)                                                                                 
## [229] Completeness of birth registration, male (%)                                                                                   
## [230] Completeness of birth registration, rural (%)                                                                                  
## [231] Completeness of birth registration, urban (%)                                                                                  
## [232] Completeness of birth registration (%)                                                                                         
## [233] Completeness of death registration with cause-of-death information (%)                                                         
## [234] Unmet need for contraception (% of married women ages 15-49)                                                                   
## 1580 Levels: 2005 PPP conversion factor, GDP (LCU per international $) ...
### i find features with not nan in them
wdi_data %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) %>% select(c(1,2,3,4,42:61)) -> temp_ir
temp_ir <- temp_ir[complete.cases(temp_ir),]
temp_ir<- temp_ir[temp_ir$`Indicator Name` %in% he_indic,] %>% arrange(`Country Name`)

imp_health <- temp_ir$`Indicator Name`[1:20]
imp_health
##  [1] "Adolescent fertility rate (births per 1,000 women ages 15-19)"     
##  [2] "Adults (ages 15+) and children (ages 0-14) newly infected with HIV"
##  [3] "Adults (ages 15+) newly infected with HIV"                         
##  [4] "Age dependency ratio (% of working-age population)"                
##  [5] "Age dependency ratio, old (% of working-age population)"           
##  [6] "Age dependency ratio, young (% of working-age population)"         
##  [7] "Birth rate, crude (per 1,000 people)"                              
##  [8] "Children (0-14) living with HIV"                                   
##  [9] "Children (ages 0-14) newly infected with HIV"                      
## [10] "Death rate, crude (per 1,000 people)"                              
## [11] "Depth of the food deficit (kilocalories per person per day)"       
## [12] "Fertility rate, total (births per woman)"                          
## [13] "Immunization, DPT (% of children ages 12-23 months)"               
## [14] "Immunization, HepB3 (% of one-year-old children)"                  
## [15] "Immunization, measles (% of children ages 12-23 months)"           
## [16] "Incidence of HIV (% of uninfected population ages 15-49)"          
## [17] "Life expectancy at birth, female (years)"                          
## [18] "Life expectancy at birth, male (years)"                            
## [19] "Life expectancy at birth, total (years)"                           
## [20] "Mortality rate, adult, female (per 1,000 female adults)"
wdi_data %>% filter(`Indicator Name` %in% imp_health)  %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) -> wdi_he_irworld
wdi_he_irworld
## # A tibble: 40 x 62
##    `Country Name` `Country Code` `Indicator Name`  `Indicator Code` `1960`
##    <chr>          <chr>          <chr>             <chr>             <dbl>
##  1 World          WLD            Adolescent ferti… SP.ADO.TFRT       86.2 
##  2 World          WLD            Adults (ages 15+… SH.HIV.INCD.TL    NA   
##  3 World          WLD            Adults (ages 15+… SH.HIV.INCD       NA   
##  4 World          WLD            Age dependency r… SP.POP.DPND       73.5 
##  5 World          WLD            Age dependency r… SP.POP.DPND.OL     8.59
##  6 World          WLD            Age dependency r… SP.POP.DPND.YG    64.0 
##  7 World          WLD            Birth rate, crud… SP.DYN.CBRT.IN    31.8 
##  8 World          WLD            Children (0-14) … SH.HIV.0014       NA   
##  9 World          WLD            Children (ages 0… SH.HIV.INCD.14    NA   
## 10 World          WLD            Death rate, crud… SP.DYN.CDRT.IN    17.7 
## # ... with 30 more rows, and 57 more variables: `1961` <dbl>,
## #   `1962` <dbl>, `1963` <dbl>, `1964` <dbl>, `1965` <dbl>, `1966` <dbl>,
## #   `1967` <dbl>, `1968` <dbl>, `1969` <dbl>, `1970` <dbl>, `1971` <dbl>,
## #   `1972` <dbl>, `1973` <dbl>, `1974` <dbl>, `1975` <dbl>, `1976` <dbl>,
## #   `1977` <dbl>, `1978` <dbl>, `1979` <dbl>, `1980` <dbl>, `1981` <dbl>,
## #   `1982` <dbl>, `1983` <dbl>, `1984` <dbl>, `1985` <dbl>, `1986` <dbl>,
## #   `1987` <dbl>, `1988` <dbl>, `1989` <dbl>, `1990` <dbl>, `1991` <dbl>,
## #   `1992` <dbl>, `1993` <dbl>, `1994` <dbl>, `1995` <dbl>, `1996` <dbl>,
## #   `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, `2000` <dbl>, `2001` <dbl>,
## #   `2002` <dbl>, `2003` <dbl>, `2004` <dbl>, `2005` <dbl>, `2006` <dbl>,
## #   `2007` <dbl>, `2008` <dbl>, `2009` <dbl>, `2010` <dbl>, `2011` <dbl>,
## #   `2012` <dbl>, `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>,
## #   `2017` <dbl>
## for last 20 years 
wdi_he_irworld <- wdi_he_irworld %>% select(1 , 2 ,3 ,4 , 43:62)
wdi_he_irworld$`Indicator Name`
##  [1] "Adolescent fertility rate (births per 1,000 women ages 15-19)"     
##  [2] "Adults (ages 15+) and children (ages 0-14) newly infected with HIV"
##  [3] "Adults (ages 15+) newly infected with HIV"                         
##  [4] "Age dependency ratio (% of working-age population)"                
##  [5] "Age dependency ratio, old (% of working-age population)"           
##  [6] "Age dependency ratio, young (% of working-age population)"         
##  [7] "Birth rate, crude (per 1,000 people)"                              
##  [8] "Children (0-14) living with HIV"                                   
##  [9] "Children (ages 0-14) newly infected with HIV"                      
## [10] "Death rate, crude (per 1,000 people)"                              
## [11] "Depth of the food deficit (kilocalories per person per day)"       
## [12] "Fertility rate, total (births per woman)"                          
## [13] "Immunization, DPT (% of children ages 12-23 months)"               
## [14] "Immunization, HepB3 (% of one-year-old children)"                  
## [15] "Immunization, measles (% of children ages 12-23 months)"           
## [16] "Incidence of HIV (% of uninfected population ages 15-49)"          
## [17] "Life expectancy at birth, female (years)"                          
## [18] "Life expectancy at birth, male (years)"                            
## [19] "Life expectancy at birth, total (years)"                           
## [20] "Mortality rate, adult, female (per 1,000 female adults)"           
## [21] "Adolescent fertility rate (births per 1,000 women ages 15-19)"     
## [22] "Adults (ages 15+) and children (ages 0-14) newly infected with HIV"
## [23] "Adults (ages 15+) newly infected with HIV"                         
## [24] "Age dependency ratio (% of working-age population)"                
## [25] "Age dependency ratio, old (% of working-age population)"           
## [26] "Age dependency ratio, young (% of working-age population)"         
## [27] "Birth rate, crude (per 1,000 people)"                              
## [28] "Children (0-14) living with HIV"                                   
## [29] "Children (ages 0-14) newly infected with HIV"                      
## [30] "Death rate, crude (per 1,000 people)"                              
## [31] "Depth of the food deficit (kilocalories per person per day)"       
## [32] "Fertility rate, total (births per woman)"                          
## [33] "Immunization, DPT (% of children ages 12-23 months)"               
## [34] "Immunization, HepB3 (% of one-year-old children)"                  
## [35] "Immunization, measles (% of children ages 12-23 months)"           
## [36] "Incidence of HIV (% of uninfected population ages 15-49)"          
## [37] "Life expectancy at birth, female (years)"                          
## [38] "Life expectancy at birth, male (years)"                            
## [39] "Life expectancy at birth, total (years)"                           
## [40] "Mortality rate, adult, female (per 1,000 female adults)"
wdi_he_irworld$`Country Name` = as.factor(wdi_he_irworld$`Country Name`)
for (each in wdi_he_irworld$`Indicator Name`){
  wdi_he_irworld %>% filter(`Indicator Name`==each) %>% select(-c(2,3,4)) %>% 
    gather('year','indic',-1) %>% ggplot(aes(x=year,y=indic,group=`Country Name`,color=`Country Name`)) + geom_line(size=2)+
    ggtitle(each) ->p
  print(p)
}
## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 7 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 7 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

kmeans on health data

all_con <- wdi_data %>% filter(`Indicator Name` %in% imp_health) 
he_data = data.frame(coun = all_con$`Country Name`,indic=all_con$`Indicator Name`,
                     indic_num = rowMeans(all_con %>% select(-c(1:4)),na.rm = T))
he_data_sp <- spread(he_data,key = indic,value =indic_num)
#### removing nan contains value
he_data_sp<- he_data_sp[complete.cases(he_data_sp),]
he_data_sp[,-1] <- scale(he_data_sp[,-c(1)])
clus <- kmeans(he_data_sp[,-1],centers = 3)
data.frame(country=he_data_sp$coun,cluster=clus$cluster) %>% arrange(cluster)
##                     country cluster
## 1               Afghanistan       1
## 2                    Angola       1
## 3                Bangladesh       1
## 4                     Benin       1
## 5                   Bolivia       1
## 6              Burkina Faso       1
## 7                  Cambodia       1
## 8                  Cameroon       1
## 9  Central African Republic       1
## 10                     Chad       1
## 11              Congo, Rep.       1
## 12            Cote d'Ivoire       1
## 13                 Djibouti       1
## 14                 Ethiopia       1
## 15                    Gabon       1
## 16                    Ghana       1
## 17                   Guinea       1
## 18            Guinea-Bissau       1
## 19                    Haiti       1
## 20                  Lao PDR       1
## 21                  Liberia       1
## 22               Madagascar       1
## 23                     Mali       1
## 24               Mauritania       1
## 25                    Nepal       1
## 26                    Niger       1
## 27                  Nigeria       1
## 28                 Pakistan       1
## 29                  Senegal       1
## 30             Sierra Leone       1
## 31                    Sudan       1
## 32                     Togo       1
## 33                   Uganda       1
## 34              Yemen, Rep.       1
## 35                  Algeria       2
## 36                Argentina       2
## 37               Azerbaijan       2
## 38                   Belize       2
## 39                   Brazil       2
## 40               Cabo Verde       2
## 41                    Chile       2
## 42                 Colombia       2
## 43               Costa Rica       2
## 44                     Cuba       2
## 45       Dominican Republic       2
## 46                  Ecuador       2
## 47         Egypt, Arab Rep.       2
## 48              El Salvador       2
## 49                  Georgia       2
## 50                Guatemala       2
## 51                   Guyana       2
## 52                 Honduras       2
## 53                Indonesia       2
## 54       Iran, Islamic Rep.       2
## 55                  Jamaica       2
## 56               Kazakhstan       2
## 57          Kyrgyz Republic       2
## 58                 Malaysia       2
## 59                   Mexico       2
## 60                  Morocco       2
## 61                  Myanmar       2
## 62                Nicaragua       2
## 63                 Paraguay       2
## 64                     Peru       2
## 65              Philippines       2
## 66             Saudi Arabia       2
## 67             South Africa       2
## 68                 Suriname       2
## 69               Tajikistan       2
## 70                 Thailand       2
## 71      Trinidad and Tobago       2
## 72                  Uruguay       2
## 73            Venezuela, RB       2
## 74                  Vietnam       2
## 75                    World       2
## 76                 Botswana       3
## 77              Gambia, The       3
## 78                    Kenya       3
## 79                   Malawi       3
## 80               Mozambique       3
## 81                  Namibia       3
## 82                   Rwanda       3
## 83                Swaziland       3
## 84                 Tanzania       3
## 85                   Zambia       3

iran now is on cluster of malesia , thailand , south africa ,World … that seems meiddle health group .

comp <-prcomp(he_data_sp[,-1])
pc_he <- data.frame(con = he_data_sp$coun,pc = comp$x[,1:2])
pc_clus<-kmeans(pc_he[,-1],centers = 3)
pc_he$pc_clus <-as.factor(pc_clus$cluster)
pc_he %>%
  ggplot() + geom_point(aes(x=pc.PC1,y=pc.PC2,color=pc_clus))

not like before clustring algorithm seems to not done well on data based on these features .

Education

unique(wdi_series$Topic) -> uni_topic
wdi_series$Indicator.Name[grepl('Education',wdi_series$Topic)] -> ed_indic
ed_indic
##   [1] Literacy rate, youth female (% of females ages 15-24)                                                   
##   [2] Literacy rate, youth (ages 15-24), gender parity index (GPI)                                            
##   [3] Literacy rate, youth male (% of males ages 15-24)                                                       
##   [4] Literacy rate, youth total (% of people ages 15-24)                                                     
##   [5] Literacy rate, adult female (% of females ages 15 and above)                                            
##   [6] Literacy rate, adult male (% of males ages 15 and above)                                                
##   [7] Literacy rate, adult total (% of people ages 15 and above)                                              
##   [8] Compulsory education, duration (years)                                                                  
##   [9] School enrollment, primary (gross), gender parity index (GPI)                                           
##  [10] School enrollment, primary and secondary (gross), gender parity index (GPI)                             
##  [11] School enrollment, secondary (gross), gender parity index (GPI)                                         
##  [12] School enrollment, tertiary (gross), gender parity index (GPI)                                          
##  [13] Preprimary education, duration (years)                                                                  
##  [14] Pupil-teacher ratio, preprimary                                                                         
##  [15] School enrollment, preprimary (% gross)                                                                 
##  [16] School enrollment, preprimary, female (% gross)                                                         
##  [17] School enrollment, preprimary, male (% gross)                                                           
##  [18] Trained teachers in preprimary education, female (% of female teachers)                                 
##  [19] Trained teachers in preprimary education, male (% of male teachers)                                     
##  [20] Trained teachers in preprimary education (% of total teachers)                                          
##  [21] Primary school starting age (years)                                                                     
##  [22] Primary completion rate, female (% of relevant age group)                                               
##  [23] Primary completion rate, male (% of relevant age group)                                                 
##  [24] Primary completion rate, total (% of relevant age group)                                                
##  [25] Educational attainment, at least completed primary, population 25+ years, female (%) (cumulative)       
##  [26] Educational attainment, at least completed primary, population 25+ years, male (%) (cumulative)         
##  [27] Educational attainment, at least completed primary, population 25+ years, total (%) (cumulative)        
##  [28] Primary education, duration (years)                                                                     
##  [29] Primary education, pupils                                                                               
##  [30] Primary education, pupils (% female)                                                                    
##  [31] Pupil-teacher ratio, primary                                                                            
##  [32] School enrollment, primary (% gross)                                                                    
##  [33] School enrollment, primary, female (% gross)                                                            
##  [34] School enrollment, primary, male (% gross)                                                              
##  [35] Gross intake ratio in first grade of primary education, female (% of relevant age group)                
##  [36] Gross intake ratio in first grade of primary education, male (% of relevant age group)                  
##  [37] Gross intake ratio in first grade of primary education, total (% of relevant age group)                 
##  [38] School enrollment, primary (% net)                                                                      
##  [39] School enrollment, primary, female (% net)                                                              
##  [40] School enrollment, primary, male (% net)                                                                
##  [41] Net intake rate in grade 1, female (% of official school-age population)                                
##  [42] Net intake rate in grade 1, male (% of official school-age population)                                  
##  [43] Net intake rate in grade 1 (% of official school-age population)                                        
##  [44] Over-age students, primary, female (% of female enrollment)                                             
##  [45] Over-age students, primary, male (% of male enrollment)                                                 
##  [46] Over-age students, primary (% of enrollment)                                                            
##  [47] School enrollment, primary, private (% of total primary)                                                
##  [48] Persistence to grade 5, female (% of cohort)                                                            
##  [49] Persistence to grade 5, male (% of cohort)                                                              
##  [50] Persistence to grade 5, total (% of cohort)                                                             
##  [51] Persistence to last grade of primary, female (% of cohort)                                              
##  [52] Persistence to last grade of primary, male (% of cohort)                                                
##  [53] Persistence to last grade of primary, total (% of cohort)                                               
##  [54] Repeaters, primary, female (% of female enrollment)                                                     
##  [55] Repeaters, primary, male (% of male enrollment)                                                         
##  [56] Repeaters, primary, total (% of total enrollment)                                                       
##  [57] Trained teachers in primary education, female (% of female teachers)                                    
##  [58] Trained teachers in primary education, male (% of male teachers)                                        
##  [59] Trained teachers in primary education (% of total teachers)                                             
##  [60] Primary education, teachers                                                                             
##  [61] Primary education, teachers (% female)                                                                  
##  [62] Adjusted net enrollment rate, primary (% of primary school age children)                                
##  [63] Adjusted net enrollment rate, primary, female (% of primary school age children)                        
##  [64] Adjusted net enrollment rate, primary, male (% of primary school age children)                          
##  [65] Children out of school, primary                                                                         
##  [66] Children out of school, primary, female                                                                 
##  [67] Children out of school, female (% of female primary school age)                                         
##  [68] Children out of school, primary, male                                                                   
##  [69] Children out of school, male (% of male primary school age)                                             
##  [70] Children out of school (% of primary school age)                                                        
##  [71] Lower secondary school starting age (years)                                                             
##  [72] Lower secondary completion rate, female (% of relevant age group)                                       
##  [73] Lower secondary completion rate, male (% of relevant age group)                                         
##  [74] Lower secondary completion rate, total (% of relevant age group)                                        
##  [75] Educational attainment, at least completed lower secondary, population 25+, female (%) (cumulative)     
##  [76] Educational attainment, at least completed lower secondary, population 25+, male (%) (cumulative)       
##  [77] Educational attainment, at least completed lower secondary, population 25+, total (%) (cumulative)      
##  [78] Educational attainment, at least completed post-secondary, population 25+, female (%) (cumulative)      
##  [79] Educational attainment, at least completed post-secondary, population 25+, male (%) (cumulative)        
##  [80] Educational attainment, at least completed post-secondary, population 25+, total (%) (cumulative)       
##  [81] Educational attainment, at least completed upper secondary, population 25+, female (%) (cumulative)     
##  [82] Educational attainment, at least completed upper secondary, population 25+, male (%) (cumulative)       
##  [83] Educational attainment, at least completed upper secondary, population 25+, total (%) (cumulative)      
##  [84] Secondary education, duration (years)                                                                   
##  [85] Secondary education, pupils                                                                             
##  [86] Secondary education, pupils (% female)                                                                  
##  [87] Secondary education, general pupils                                                                     
##  [88] Secondary education, general pupils (% female)                                                          
##  [89] Pupil-teacher ratio, lower secondary                                                                    
##  [90] Pupil-teacher ratio, secondary                                                                          
##  [91] Pupil-teacher ratio, upper secondary                                                                    
##  [92] Secondary education, vocational pupils                                                                  
##  [93] Secondary education, vocational pupils (% female)                                                       
##  [94] School enrollment, secondary (% gross)                                                                  
##  [95] School enrollment, secondary, female (% gross)                                                          
##  [96] School enrollment, secondary, male (% gross)                                                            
##  [97] School enrollment, secondary (% net)                                                                    
##  [98] School enrollment, secondary, female (% net)                                                            
##  [99] School enrollment, secondary, male (% net)                                                              
## [100] School enrollment, secondary, private (% of total secondary)                                            
## [101] Progression to secondary school, female (%)                                                             
## [102] Progression to secondary school, male (%)                                                               
## [103] Progression to secondary school (%)                                                                     
## [104] Trained teachers in secondary education, female (% of female teachers)                                  
## [105] Trained teachers in lower secondary education, female (% of female teachers)                            
## [106] Trained teachers in lower secondary education, male (% of male teachers)                                
## [107] Trained teachers in lower secondary education (% of total teachers)                                     
## [108] Trained teachers in secondary education, male (% of male teachers)                                      
## [109] Trained teachers in upper secondary education, female (% of female teachers)                            
## [110] Trained teachers in upper secondary education, male (% of male teachers)                                
## [111] Trained teachers in upper secondary education (% of total teachers)                                     
## [112] Trained teachers in secondary education (% of total teachers)                                           
## [113] Secondary education, teachers                                                                           
## [114] Secondary education, teachers, female                                                                   
## [115] Secondary education, teachers (% female)                                                                
## [116] Adolescents out of school, female (% of female lower secondary school age)                              
## [117] Adolescents out of school, male (% of male lower secondary school age)                                  
## [118] Adolescents out of school (% of lower secondary school age)                                             
## [119] Educational attainment, at least Bachelor's or equivalent, population 25+, female (%) (cumulative)      
## [120] Educational attainment, at least Bachelor's or equivalent, population 25+, male (%) (cumulative)        
## [121] Educational attainment, at least Bachelor's or equivalent, population 25+, total (%) (cumulative)       
## [122] Educational attainment, Doctoral or equivalent, population 25+, female (%) (cumulative)                 
## [123] Educational attainment, Doctoral or equivalent, population 25+, male (%) (cumulative)                   
## [124] Educational attainment, Doctoral or equivalent, population 25+, total (%) (cumulative)                  
## [125] Educational attainment, at least Master's or equivalent, population 25+, female (%) (cumulative)        
## [126] Educational attainment, at least Master's or equivalent, population 25+, male (%) (cumulative)          
## [127] Educational attainment, at least Master's or equivalent, population 25+, total (%) (cumulative)         
## [128] Educational attainment, at least completed short-cycle tertiary, population 25+, female (%) (cumulative)
## [129] Educational attainment, at least completed short-cycle tertiary, population 25+, male (%) (cumulative)  
## [130] Educational attainment, at least completed short-cycle tertiary, population 25+, total (%) (cumulative) 
## [131] Pupil-teacher ratio, tertiary                                                                           
## [132] School enrollment, tertiary (% gross)                                                                   
## [133] School enrollment, tertiary, female (% gross)                                                           
## [134] School enrollment, tertiary, male (% gross)                                                             
## [135] Tertiary education, academic staff (% female)                                                           
## [136] Current education expenditure, primary (% of total expenditure in primary public institutions)          
## [137] Current education expenditure, secondary (% of total expenditure in secondary public institutions)      
## [138] Current education expenditure, tertiary (% of total expenditure in tertiary public institutions)        
## [139] Current education expenditure, total (% of total expenditure in public institutions)                    
## [140] All education staff compensation, primary (% of total expenditure in primary public institutions)       
## [141] All education staff compensation, secondary (% of total expenditure in secondary public institutions)   
## [142] All education staff compensation, tertiary (% of total expenditure in tertiary public institutions)     
## [143] All education staff compensation, total (% of total expenditure in public institutions)                 
## [144] Government expenditure per student, primary (% of GDP per capita)                                       
## [145] Expenditure on primary education (% of government expenditure on education)                             
## [146] Government expenditure per student, secondary (% of GDP per capita)                                     
## [147] Expenditure on secondary education (% of government expenditure on education)                           
## [148] Government expenditure per student, tertiary (% of GDP per capita)                                      
## [149] Expenditure on tertiary education (% of government expenditure on education)                            
## [150] Government expenditure on education, total (% of government expenditure)                                
## [151] Government expenditure on education, total (% of GDP)                                                   
## 1580 Levels: 2005 PPP conversion factor, GDP (LCU per international $) ...
### i find features with not nan in them
wdi_data %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) %>% select(c(1,2,3,4,42:61)) -> temp_ir
temp_ir <- temp_ir[complete.cases(temp_ir),]
temp_ir <- temp_ir[temp_ir$`Indicator Name` %in% ed_indic,] %>% arrange(`Country Name`)

imp_ed <- temp_ir$`Indicator Name`[1:20]
imp_ed
##  [1] "Lower secondary school starting age (years)"                                             
##  [2] "Preprimary education, duration (years)"                                                  
##  [3] "Primary education, duration (years)"                                                     
##  [4] "Primary school starting age (years)"                                                     
##  [5] "Secondary education, duration (years)"                                                   
##  [6] "Adjusted net enrollment rate, primary (% of primary school age children)"                
##  [7] "Adjusted net enrollment rate, primary, female (% of primary school age children)"        
##  [8] "Adjusted net enrollment rate, primary, male (% of primary school age children)"          
##  [9] "Children out of school (% of primary school age)"                                        
## [10] "Children out of school, female (% of female primary school age)"                         
## [11] "Children out of school, male (% of male primary school age)"                             
## [12] "Children out of school, primary"                                                         
## [13] "Children out of school, primary, female"                                                 
## [14] "Children out of school, primary, male"                                                   
## [15] "Gross intake ratio in first grade of primary education, female (% of relevant age group)"
## [16] "Gross intake ratio in first grade of primary education, male (% of relevant age group)"  
## [17] "Gross intake ratio in first grade of primary education, total (% of relevant age group)" 
## [18] "Literacy rate, adult female (% of females ages 15 and above)"                            
## [19] "Literacy rate, adult male (% of males ages 15 and above)"                                
## [20] "Literacy rate, adult total (% of people ages 15 and above)"
wdi_data %>% filter(`Indicator Name` %in% imp_health)  %>% filter(`Country Name` %in% c("World" , "Iran, Islamic Rep.")) -> wdi_ed_irworld
## for last 20 years 
wdi_ed_irworld <- wdi_ed_irworld %>% select(1 , 2 ,3 ,4 , 43:62)
wdi_ed_irworld$`Indicator Name`
##  [1] "Adolescent fertility rate (births per 1,000 women ages 15-19)"     
##  [2] "Adults (ages 15+) and children (ages 0-14) newly infected with HIV"
##  [3] "Adults (ages 15+) newly infected with HIV"                         
##  [4] "Age dependency ratio (% of working-age population)"                
##  [5] "Age dependency ratio, old (% of working-age population)"           
##  [6] "Age dependency ratio, young (% of working-age population)"         
##  [7] "Birth rate, crude (per 1,000 people)"                              
##  [8] "Children (0-14) living with HIV"                                   
##  [9] "Children (ages 0-14) newly infected with HIV"                      
## [10] "Death rate, crude (per 1,000 people)"                              
## [11] "Depth of the food deficit (kilocalories per person per day)"       
## [12] "Fertility rate, total (births per woman)"                          
## [13] "Immunization, DPT (% of children ages 12-23 months)"               
## [14] "Immunization, HepB3 (% of one-year-old children)"                  
## [15] "Immunization, measles (% of children ages 12-23 months)"           
## [16] "Incidence of HIV (% of uninfected population ages 15-49)"          
## [17] "Life expectancy at birth, female (years)"                          
## [18] "Life expectancy at birth, male (years)"                            
## [19] "Life expectancy at birth, total (years)"                           
## [20] "Mortality rate, adult, female (per 1,000 female adults)"           
## [21] "Adolescent fertility rate (births per 1,000 women ages 15-19)"     
## [22] "Adults (ages 15+) and children (ages 0-14) newly infected with HIV"
## [23] "Adults (ages 15+) newly infected with HIV"                         
## [24] "Age dependency ratio (% of working-age population)"                
## [25] "Age dependency ratio, old (% of working-age population)"           
## [26] "Age dependency ratio, young (% of working-age population)"         
## [27] "Birth rate, crude (per 1,000 people)"                              
## [28] "Children (0-14) living with HIV"                                   
## [29] "Children (ages 0-14) newly infected with HIV"                      
## [30] "Death rate, crude (per 1,000 people)"                              
## [31] "Depth of the food deficit (kilocalories per person per day)"       
## [32] "Fertility rate, total (births per woman)"                          
## [33] "Immunization, DPT (% of children ages 12-23 months)"               
## [34] "Immunization, HepB3 (% of one-year-old children)"                  
## [35] "Immunization, measles (% of children ages 12-23 months)"           
## [36] "Incidence of HIV (% of uninfected population ages 15-49)"          
## [37] "Life expectancy at birth, female (years)"                          
## [38] "Life expectancy at birth, male (years)"                            
## [39] "Life expectancy at birth, total (years)"                           
## [40] "Mortality rate, adult, female (per 1,000 female adults)"
wdi_ed_irworld$`Country Name` = as.factor(wdi_ed_irworld$`Country Name`)
for (each in wdi_ed_irworld$`Indicator Name`){
  wdi_ed_irworld %>% filter(`Indicator Name`==each) %>% select(-c(2,3,4)) %>% 
    gather('year','indic',-1) %>% ggplot(aes(x=year,y=indic,group=`Country Name`,color=`Country Name`)) + geom_line(size=2)+
    ggtitle(each) ->p
  print(p)
}
## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 7 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 7 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

kmeans on education data

all_con <- wdi_data %>% filter(`Indicator Name` %in% imp_ed) 
ed_data = data.frame(coun = all_con$`Country Name`,indic=all_con$`Indicator Name`,
                     indic_num = rowMeans(all_con %>% select(-c(1:4)),na.rm = T))
ed_data_sp <- spread(ed_data,key = indic,value =indic_num)
#### removing nan contains value
ed_data_sp<- ed_data_sp[complete.cases(ed_data_sp),]
ed_data_sp[,-1] <- scale(ed_data_sp[,-c(1)])
clus <- kmeans(ed_data_sp[,-1],centers = 3)
data.frame(country=ed_data_sp$coun,cluster=clus$cluster) %>% arrange(cluster)
##                            country cluster
## 1                       Bangladesh       1
## 2                            China       1
## 3                         Ethiopia       1
## 4                            India       1
## 5                          Nigeria       1
## 6                         Pakistan       1
## 7                      Afghanistan       2
## 8                           Angola       2
## 9                            Benin       2
## 10                          Bhutan       2
## 11                    Burkina Faso       2
## 12                         Burundi       2
## 13                        Cameroon       2
## 14        Central African Republic       2
## 15                            Chad       2
## 16                         Comoros       2
## 17                Congo, Dem. Rep.       2
## 18                   Cote d'Ivoire       2
## 19               Equatorial Guinea       2
## 20                         Eritrea       2
## 21                     Gambia, The       2
## 22                           Ghana       2
## 23                          Guinea       2
## 24                   Guinea-Bissau       2
## 25                           Haiti       2
## 26                         Liberia       2
## 27                            Mali       2
## 28                      Mauritania       2
## 29                         Morocco       2
## 30                      Mozambique       2
## 31                           Niger       2
## 32                            Oman       2
## 33                Papua New Guinea       2
## 34                          Rwanda       2
## 35                         Senegal       2
## 36                     South Sudan       2
## 37                           Sudan       2
## 38                       Swaziland       2
## 39                        Tanzania       2
## 40                            Togo       2
## 41                     Yemen, Rep.       2
## 42                         Albania       3
## 43                         Algeria       3
## 44             Antigua and Barbuda       3
## 45                       Argentina       3
## 46                         Armenia       3
## 47                           Aruba       3
## 48                      Azerbaijan       3
## 49                         Bahrain       3
## 50                        Barbados       3
## 51                         Belarus       3
## 52                          Belize       3
## 53                         Bolivia       3
## 54                        Botswana       3
## 55                          Brazil       3
## 56               Brunei Darussalam       3
## 57                        Bulgaria       3
## 58                      Cabo Verde       3
## 59                        Cambodia       3
## 60                           Chile       3
## 61                        Colombia       3
## 62                     Congo, Rep.       3
## 63                      Costa Rica       3
## 64                         Croatia       3
## 65                            Cuba       3
## 66                          Cyprus       3
## 67              Dominican Republic       3
## 68                         Ecuador       3
## 69                Egypt, Arab Rep.       3
## 70                     El Salvador       3
## 71                         Estonia       3
## 72                           Gabon       3
## 73                         Georgia       3
## 74                          Greece       3
## 75                         Grenada       3
## 76                       Guatemala       3
## 77                          Guyana       3
## 78                        Honduras       3
## 79                         Hungary       3
## 80                       Indonesia       3
## 81              Iran, Islamic Rep.       3
## 82                            Iraq       3
## 83                          Israel       3
## 84                           Italy       3
## 85                         Jamaica       3
## 86                          Jordan       3
## 87                      Kazakhstan       3
## 88                           Kenya       3
## 89       Korea, Dem. People’s Rep.       3
## 90                          Kuwait       3
## 91                 Kyrgyz Republic       3
## 92                         Lao PDR       3
## 93                          Latvia       3
## 94                         Lebanon       3
## 95                         Lesotho       3
## 96                           Libya       3
## 97                       Lithuania       3
## 98                Macao SAR, China       3
## 99                  Macedonia, FYR       3
## 100                     Madagascar       3
## 101                         Malawi       3
## 102                       Malaysia       3
## 103                       Maldives       3
## 104                          Malta       3
## 105               Marshall Islands       3
## 106                      Mauritius       3
## 107                         Mexico       3
## 108                        Moldova       3
## 109                       Mongolia       3
## 110                     Montenegro       3
## 111                        Myanmar       3
## 112                        Namibia       3
## 113                          Nepal       3
## 114                      Nicaragua       3
## 115                         Panama       3
## 116                       Paraguay       3
## 117                           Peru       3
## 118                    Philippines       3
## 119                         Poland       3
## 120                       Portugal       3
## 121                    Puerto Rico       3
## 122                          Qatar       3
## 123                        Romania       3
## 124             Russian Federation       3
## 125                          Samoa       3
## 126          Sao Tome and Principe       3
## 127                   Saudi Arabia       3
## 128                         Serbia       3
## 129                     Seychelles       3
## 130                   Sierra Leone       3
## 131                       Slovenia       3
## 132                Solomon Islands       3
## 133                   South Africa       3
## 134                          Spain       3
## 135                      Sri Lanka       3
## 136 St. Vincent and the Grenadines       3
## 137                       Suriname       3
## 138           Syrian Arab Republic       3
## 139                     Tajikistan       3
## 140                       Thailand       3
## 141                    Timor-Leste       3
## 142                          Tonga       3
## 143            Trinidad and Tobago       3
## 144                        Tunisia       3
## 145                         Turkey       3
## 146                         Uganda       3
## 147                        Ukraine       3
## 148           United Arab Emirates       3
## 149                        Uruguay       3
## 150                     Uzbekistan       3
## 151                        Vanuatu       3
## 152                  Venezuela, RB       3
## 153             West Bank and Gaza       3
## 154                         Zambia       3
## 155                       Zimbabwe       3

it seems that iran in education is in group of indonesia , myanmar , .. that seems too not have good education .

pca on education data

comp <-prcomp(he_data_sp[,-1])
pc_he <- data.frame(con = he_data_sp$coun,pc = comp$x[,1:2])
pc_clus<-kmeans(pc_he[,-1],centers = 3)
pc_he$pc_clus <-as.factor(pc_clus$cluster)
pc_he %>%
  ggplot() + geom_point(aes(x=pc.PC1,y=pc.PC2,color=pc_clus))


۱۰. کشورهای دنیا را بر حسب ۶۰ شاخص اقتصادی، سلامت و آموزش با روش سلسله مراتبی خوشه بندی کرده و دندروگرام آن را رسم نمایید. اگر داده ها بر سه دسته تقسیم شوند ایران در کدام دسته می گنجد؟

imp = c(imp_economy,imp_health,imp_ed)
all_con <- wdi_data %>% filter(`Indicator Name` %in% imp) 
ed_data = data.frame(coun = all_con$`Country Name`,indic=all_con$`Indicator Name`,
                     indic_num = rowMeans(all_con %>% select(-c(1:4)),na.rm = T))
data_sp <- spread(ed_data,key = indic,value =indic_num)
#### removing nan contains value
data_sp<- data_sp[complete.cases(data_sp),]
rownames(data_sp) <-data_sp[,1]
data_sp <- data_sp[,-1]
dist = stats::dist(data_sp,method = "euclidean")
clus = hclust(dist,method = "complete")
plot(clus)

I remove countries that contains nan in my 60 features . in this dendogeram it seems that iran is in group of turkey and pakestan, …. .


۱۱. سه یافته جالب از داده ها استخراج کنید.